{
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "# Adult数据集"
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## Mondrian"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 1,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='mondrian', dataset='adult', experiment='Y', model='lr')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "LogisticRegression(max_iter=1000)\n",
                        "baseline: acc=0.8258, precision=0.7733, recall=0.7263, f1=0.7441\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.0426,  CVAG: 2.1520, DM: 236866, runtime: 2.5895s\n",
                        "acc=0.8251, precision=0.7726, recall=0.7236, f1=0.7419\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.1333,  CVAG: 2.2252, DM: 513256, runtime: 2.1874s\n",
                        "acc=0.8210, precision=0.7652, recall=0.7200, f1=0.7371\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2132,  CVAG: 2.1004, DM: 861416, runtime: 2.2575s\n",
                        "acc=0.8167, precision=0.7589, recall=0.7120, f1=0.7293\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2570,  CVAG: 2.0169, DM: 1172616, runtime: 2.0138s\n",
                        "acc=0.8202, precision=0.7636, recall=0.7201, f1=0.7367\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2945,  CVAG: 1.9586, DM: 1488604, runtime: 1.9413s\n",
                        "acc=0.8190, precision=0.7632, recall=0.7140, f1=0.7320\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3589,  CVAG: 2.2695, DM: 4636358, runtime: 1.9853s\n",
                        "acc=0.8153, precision=0.7575, recall=0.7079, f1=0.7259\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.4224,  CVAG: 2.4482, DM: 8621030, runtime: 1.8630s\n",
                        "acc=0.8124, precision=0.7515, recall=0.7073, f1=0.7237\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.4338,  CVAG: 2.4622, DM: 9405394, runtime: 1.8337s\n",
                        "acc=0.8104, precision=0.7491, recall=0.7016, f1=0.7187\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.5214,  CVAG: 4.0216, DM: 25779338, runtime: 1.8356s\n",
                        "acc=0.8076, precision=0.7433, recall=0.7026, f1=0.7179\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6396,  CVAG: 4.7876, DM: 45887082, runtime: 1.7620s\n",
                        "acc=0.8093, precision=0.7455, recall=0.7072, f1=0.7219\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6545,  CVAG: 4.3089, DM: 49366150, runtime: 1.6776s\n",
                        "acc=0.8106, precision=0.7456, recall=0.7204, f1=0.7310\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6803,  CVAG: 5.0270, DM: 62427676, runtime: 1.6548s\n",
                        "acc=0.8101, precision=0.7504, recall=0.6961, f1=0.7147\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6873,  CVAG: 4.7876, DM: 77217462, runtime: 1.6721s\n",
                        "acc=0.7899, precision=0.7276, recall=0.6317, f1=0.6500\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7031,  CVAG: 4.7876, DM: 80704456, runtime: 1.5087s\n",
                        "acc=0.7878, precision=0.7122, recall=0.6636, f1=0.6792\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7071,  CVAG: 4.7128, DM: 82164014, runtime: 1.5569s\n",
                        "acc=0.7858, precision=0.7122, recall=0.6415, f1=0.6588\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7138,  CVAG: 4.7876, DM: 91812950, runtime: 1.5767s\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7138,  CVAG: 4.3089, DM: 91812950, runtime: 1.4363s\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7192,  CVAG: 3.8669, DM: 93457860, runtime: 1.4901s\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7213,  CVAG: 3.9171, DM: 96917970, runtime: 1.5467s\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7355,  CVAG: 3.7702, DM: 103811898, runtime: 1.3676s\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7355,  CVAG: 3.3513, DM: 103811898, runtime: 1.3815s\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7355,  CVAG: 3.0162, DM: 103811898, runtime: 1.3744s\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 3min 13s, sys: 48.9 s, total: 4min 2s\n",
                        "Wall time: 2min 16s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method mondrian --model lr --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 2,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='mondrian', dataset='adult', experiment='Y', model='nb')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "GaussianNB(var_smoothing=1.0)\n",
                        "baseline: acc=0.7985, precision=0.7297, recall=0.7419, f1=0.7353\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7661, precision=0.7082, recall=0.7554, f1=0.7201\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7785, precision=0.7119, recall=0.7435, f1=0.7232\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7742, precision=0.7007, recall=0.7170, f1=0.7076\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7931, precision=0.7201, recall=0.7134, f1=0.7166\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7907, precision=0.7157, recall=0.6930, f1=0.7024\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8019, precision=0.7393, recall=0.6749, f1=0.6944\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7973, precision=0.7306, recall=0.6697, f1=0.6882\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7963, precision=0.7338, recall=0.6550, f1=0.6749\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7978, precision=0.7319, recall=0.6688, f1=0.6876\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7719, precision=0.7383, recall=0.5533, f1=0.5386\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7878, precision=0.7565, recall=0.5973, f1=0.6069\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7977, precision=0.7447, recall=0.6434, f1=0.6641\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7332, precision=0.6300, recall=0.6177, f1=0.6226\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7350, precision=0.6377, recall=0.6315, f1=0.6343\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7600, precision=0.6885, recall=0.7139, f1=0.6977\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7491, precision=0.6798, recall=0.7097, f1=0.6894\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7491, precision=0.6798, recall=0.7097, f1=0.6894\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7491, precision=0.6798, recall=0.7097, f1=0.6894\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7312, precision=0.6153, recall=0.5913, f1=0.5979\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 22s, sys: 424 ms, total: 1min 23s\n",
                        "Wall time: 1min 23s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method mondrian --model nb --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 3,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='mondrian', dataset='adult', experiment='Y', model='knn')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "KNeighborsClassifier(n_neighbors=10)\n",
                        "baseline: acc=0.8215, precision=0.7675, recall=0.7172, f1=0.7356\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8199, precision=0.7678, recall=0.7079, f1=0.7284\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8178, precision=0.7644, recall=0.7044, f1=0.7248\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8149, precision=0.7577, recall=0.7047, f1=0.7234\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8187, precision=0.7624, recall=0.7141, f1=0.7319\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8080, precision=0.7457, recall=0.6966, f1=0.7139\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8041, precision=0.7398, recall=0.6883, f1=0.7059\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8073, precision=0.7428, recall=0.7023, f1=0.7175\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8035, precision=0.7408, recall=0.6811, f1=0.7001\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8036, precision=0.7435, recall=0.6748, f1=0.6951\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8014, precision=0.7399, recall=0.6706, f1=0.6907\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8085, precision=0.7484, recall=0.6912, f1=0.7103\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8101, precision=0.7504, recall=0.6961, f1=0.7147\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7851, precision=0.7116, recall=0.6372, f1=0.6544\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7841, precision=0.7142, recall=0.6234, f1=0.6398\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7771, precision=0.6947, recall=0.6282, f1=0.6435\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 5min 40s, sys: 1.65 s, total: 5min 42s\n",
                        "Wall time: 2min 50s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method mondrian --model knn --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 4,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='mondrian', dataset='adult', experiment='Y', model='svm')\n",
                        "----------------------------------------------------------------------------------------------------------\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "LinearSVC(dual=False)\n",
                        "baseline: acc=0.8272, precision=0.7757, recall=0.7274, f1=0.7456\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8253, precision=0.7737, recall=0.7223, f1=0.7412\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8211, precision=0.7669, recall=0.7161, f1=0.7347\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8160, precision=0.7595, recall=0.7065, f1=0.7253\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8208, precision=0.7652, recall=0.7188, f1=0.7362\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8196, precision=0.7643, recall=0.7147, f1=0.7329\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8160, precision=0.7582, recall=0.7099, f1=0.7275\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8079, precision=0.7498, recall=0.6845, f1=0.7049\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8087, precision=0.7480, recall=0.6940, f1=0.7124\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8078, precision=0.7431, recall=0.7057, f1=0.7201\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8108, precision=0.7510, recall=0.6985, f1=0.7168\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8116, precision=0.7530, recall=0.6975, f1=0.7165\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8094, precision=0.7528, recall=0.6858, f1=0.7066\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7891, precision=0.7324, recall=0.6214, f1=0.6382\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7870, precision=0.7122, recall=0.6532, f1=0.6701\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7821, precision=0.7220, recall=0.6013, f1=0.6128\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 45s, sys: 20.7 s, total: 2min 6s\n",
                        "Wall time: 1min 28s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method mondrian --model svm --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 5,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='mondrian', dataset='adult', experiment='Y', model='gbt')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "GradientBoostingClassifier(learning_rate=1.0, max_depth=1, random_state=0)\n",
                        "baseline: acc=0.8272, precision=0.7796, recall=0.7193, f1=0.7404\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8297, precision=0.7801, recall=0.7302, f1=0.7490\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8255, precision=0.7737, recall=0.7235, f1=0.7421\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7870, precision=0.7132, recall=0.7163, f1=0.7147\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8233, precision=0.7702, recall=0.7202, f1=0.7386\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8202, precision=0.7631, recall=0.7215, f1=0.7376\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8170, precision=0.7603, recall=0.7101, f1=0.7283\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8167, precision=0.7600, recall=0.7090, f1=0.7273\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8157, precision=0.7604, recall=0.7024, f1=0.7222\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8079, precision=0.7431, recall=0.7070, f1=0.7210\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8093, precision=0.7455, recall=0.7072, f1=0.7219\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8106, precision=0.7456, recall=0.7204, f1=0.7310\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8121, precision=0.7479, recall=0.7225, f1=0.7332\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7919, precision=0.7218, recall=0.6581, f1=0.6762\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7870, precision=0.7122, recall=0.6532, f1=0.6701\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7858, precision=0.7122, recall=0.6415, f1=0.6588\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7750, precision=0.6897, recall=0.6373, f1=0.6518\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7724, precision=0.6853, recall=0.6184, f1=0.6323\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 56s, sys: 923 ms, total: 1min 57s\n",
                        "Wall time: 1min 56s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method mondrian --model gbt --experiment Y "
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## TDG"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 6,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='tdg', dataset='adult', experiment='Y', model='lr')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "LogisticRegression(max_iter=1000)\n",
                        "baseline: acc=0.8258, precision=0.7733, recall=0.7263, f1=0.7441\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.0378,  CVAG: 1.5148, DM: 159644, runtime: 24.0716s\n",
                        "acc=0.8232, precision=0.7688, recall=0.7230, f1=0.7404\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.0984,  CVAG: 1.3852, DM: 256294, runtime: 23.0692s\n",
                        "acc=0.8210, precision=0.7679, recall=0.7133, f1=0.7328\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.1650,  CVAG: 1.3154, DM: 462122, runtime: 21.5335s\n",
                        "acc=0.8136, precision=0.7560, recall=0.7014, f1=0.7204\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2110,  CVAG: 1.3023, DM: 664772, runtime: 21.1863s\n",
                        "acc=0.8096, precision=0.7521, recall=0.6885, f1=0.7088\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2510,  CVAG: 1.2802, DM: 867874, runtime: 19.7972s\n",
                        "acc=0.8022, precision=0.7376, recall=0.6819, f1=0.7001\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3206,  CVAG: 1.2940, DM: 1684948, runtime: 18.7399s\n",
                        "acc=0.8047, precision=0.7432, recall=0.6824, f1=0.7017\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3554,  CVAG: 1.2781, DM: 1760778, runtime: 17.8741s\n",
                        "acc=0.7999, precision=0.7357, recall=0.6717, f1=0.6909\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3997,  CVAG: 1.3346, DM: 3099034, runtime: 17.5745s\n",
                        "acc=0.7951, precision=0.7294, recall=0.6579, f1=0.6772\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.5377,  CVAG: 1.4094, DM: 6407414, runtime: 15.7137s\n",
                        "acc=0.7968, precision=0.7356, recall=0.6537, f1=0.6739\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6159,  CVAG: 1.4895, DM: 10057272, runtime: 14.4508s\n",
                        "acc=0.7814, precision=0.7084, recall=0.6188, f1=0.6342\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6767,  CVAG: 1.5709, DM: 14946200, runtime: 13.4843s\n",
                        "acc=0.7873, precision=0.7249, recall=0.6225, f1=0.6393\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7345,  CVAG: 1.7742, DM: 36894774, runtime: 13.0931s\n",
                        "acc=0.7712, precision=0.6833, recall=0.6122, f1=0.6253\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7545,  CVAG: 1.7334, DM: 36185548, runtime: 12.9659s\n",
                        "acc=0.7495, precision=0.6179, recall=0.5416, f1=0.5292\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.7885,  CVAG: 1.9586, DM: 56545120, runtime: 12.4236s\n",
                        "acc=0.7535, precision=0.6176, recall=0.5128, f1=0.4658\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.8074,  CVAG: 1.9843, DM: 54699862, runtime: 11.3254s\n",
                        "acc=0.7757, precision=0.6943, recall=0.6129, f1=0.6266\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.8255,  CVAG: 2.1622, DM: 76157598, runtime: 11.6725s\n",
                        "acc=0.7619, precision=0.6636, recall=0.5643, f1=0.5621\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.8245,  CVAG: 2.1544, DM: 91518584, runtime: 11.2032s\n",
                        "acc=0.7745, precision=0.6966, recall=0.5963, f1=0.6063\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.8852,  CVAG: 2.3938, DM: 129873306, runtime: 10.5783s\n",
                        "acc=0.7540, precision=0.6246, recall=0.5138, f1=0.4676\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.8954,  CVAG: 2.5346, DM: 161583202, runtime: 10.3135s\n",
                        "acc=0.7669, precision=0.6820, recall=0.5693, f1=0.5686\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.8859,  CVAG: 2.2178, DM: 122638286, runtime: 10.9563s\n",
                        "acc=0.7827, precision=0.7153, recall=0.6136, f1=0.6283\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.9023,  CVAG: 2.7928, DM: 155790686, runtime: 10.1237s\n",
                        "acc=0.7570, precision=0.6529, recall=0.5241, f1=0.4889\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.9181,  CVAG: 3.0162, DM: 295122886, runtime: 9.5990s\n",
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 7min 57s, sys: 44.4 s, total: 8min 41s\n",
                        "Wall time: 7min 8s\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method tdg --model lr --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 7,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='tdg', dataset='adult', experiment='Y', model='nb')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "GaussianNB(var_smoothing=1.0)\n",
                        "baseline: acc=0.7985, precision=0.7297, recall=0.7419, f1=0.7353\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7490, precision=0.6969, recall=0.7484, f1=0.7061\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7596, precision=0.7007, recall=0.7457, f1=0.7120\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7596, precision=0.6951, recall=0.7319, f1=0.7060\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7668, precision=0.6962, recall=0.7218, f1=0.7057\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7661, precision=0.6926, recall=0.7126, f1=0.7006\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7764, precision=0.7027, recall=0.7172, f1=0.7090\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7714, precision=0.6913, recall=0.6906, f1=0.6909\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7605, precision=0.6786, recall=0.6827, f1=0.6805\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7762, precision=0.6923, recall=0.6576, f1=0.6697\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7733, precision=0.6949, recall=0.5919, f1=0.6005\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7683, precision=0.6773, recall=0.6013, f1=0.6124\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7685, precision=0.6961, recall=0.5614, f1=0.5549\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7583, precision=0.6514, recall=0.5584, f1=0.5538\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7688, precision=0.7193, recall=0.5493, f1=0.5326\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7589, precision=0.6574, recall=0.5398, f1=0.5204\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7760, precision=0.7083, recall=0.5874, f1=0.5941\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7575, precision=0.6488, recall=0.5593, f1=0.5555\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 24s, sys: 897 ms, total: 1min 25s\n",
                        "Wall time: 1min 23s\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method tdg --model nb --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 8,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='tdg', dataset='adult', experiment='Y', model='knn')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "KNeighborsClassifier(n_neighbors=10)\n",
                        "baseline: acc=0.8215, precision=0.7675, recall=0.7172, f1=0.7356\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8182, precision=0.7650, recall=0.7054, f1=0.7258\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8131, precision=0.7574, recall=0.6955, f1=0.7159\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8146, precision=0.7620, recall=0.6933, f1=0.7150\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8087, precision=0.7504, recall=0.6874, f1=0.7076\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8011, precision=0.7374, recall=0.6749, f1=0.6941\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8036, precision=0.7417, recall=0.6794, f1=0.6988\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8044, precision=0.7446, recall=0.6768, f1=0.6971\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7914, precision=0.7239, recall=0.6477, f1=0.6665\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7946, precision=0.7292, recall=0.6548, f1=0.6742\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7876, precision=0.7187, recall=0.6358, f1=0.6537\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7883, precision=0.7191, recall=0.6393, f1=0.6574\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7867, precision=0.7152, recall=0.6389, f1=0.6565\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7690, precision=0.6831, recall=0.5846, f1=0.5909\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7414, precision=0.6112, recall=0.5604, f1=0.5612\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7696, precision=0.6845, recall=0.5859, f1=0.5926\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7594, precision=0.6603, recall=0.5406, f1=0.5216\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7560, precision=0.6566, recall=0.6212, f1=0.6317\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7455, precision=0.5838, recall=0.5203, f1=0.4908\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7743, precision=0.6890, recall=0.6254, f1=0.6399\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7739, precision=0.6881, recall=0.6246, f1=0.6390\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7570, precision=0.6529, recall=0.5241, f1=0.4889\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 6min 25s, sys: 541 ms, total: 6min 25s\n",
                        "Wall time: 3min 26s\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method tdg --model knn --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 9,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='tdg', dataset='adult', experiment='Y', model='svm')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "LinearSVC(dual=False)\n",
                        "baseline: acc=0.8272, precision=0.7757, recall=0.7274, f1=0.7456\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8246, precision=0.7719, recall=0.7230, f1=0.7413\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8213, precision=0.7705, recall=0.7095, f1=0.7303\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8131, precision=0.7579, recall=0.6945, f1=0.7151\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8086, precision=0.7517, recall=0.6840, f1=0.7048\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8063, precision=0.7466, recall=0.6828, f1=0.7028\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8053, precision=0.7470, recall=0.6762, f1=0.6970\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7991, precision=0.7394, recall=0.6588, f1=0.6794\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7931, precision=0.7315, recall=0.6419, f1=0.6613\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7989, precision=0.7430, recall=0.6513, f1=0.6724\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7814, precision=0.7084, recall=0.6188, f1=0.6342\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7877, precision=0.7227, recall=0.6283, f1=0.6458\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7741, precision=0.6949, recall=0.5976, f1=0.6080\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7557, precision=0.6405, recall=0.5391, f1=0.5210\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7520, precision=0.5639, recall=0.5032, f1=0.4428\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7720, precision=0.6915, recall=0.5894, f1=0.5971\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7646, precision=0.6694, recall=0.5841, f1=0.5907\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7745, precision=0.6966, recall=0.5963, f1=0.6063\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7540, precision=0.6246, recall=0.5138, f1=0.4676\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7669, precision=0.6820, recall=0.5693, f1=0.5686\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7827, precision=0.7153, recall=0.6136, f1=0.6283\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7570, precision=0.6529, recall=0.5241, f1=0.4889\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 44s, sys: 20.7 s, total: 2min 4s\n",
                        "Wall time: 1min 25s\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method tdg --model svm --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 10,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='tdg', dataset='adult', experiment='Y', model='gbt')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "GradientBoostingClassifier(learning_rate=1.0, max_depth=1, random_state=0)\n",
                        "baseline: acc=0.8272, precision=0.7796, recall=0.7193, f1=0.7404\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8284, precision=0.7768, recall=0.7311, f1=0.7487\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8157, precision=0.7584, recall=0.7075, f1=0.7258\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8151, precision=0.7600, recall=0.7003, f1=0.7204\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8122, precision=0.7565, recall=0.6928, f1=0.7134\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8040, precision=0.7402, recall=0.6863, f1=0.7043\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8045, precision=0.7409, recall=0.6878, f1=0.7058\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7998, precision=0.7342, recall=0.6756, f1=0.6940\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7978, precision=0.7353, recall=0.6599, f1=0.6799\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7968, precision=0.7356, recall=0.6537, f1=0.6739\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7799, precision=0.7058, recall=0.6141, f1=0.6286\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7857, precision=0.7109, recall=0.6458, f1=0.6629\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7825, precision=0.7126, recall=0.6171, f1=0.6324\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7488, precision=0.6191, recall=0.5473, f1=0.5391\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7628, precision=0.6848, recall=0.5403, f1=0.5187\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7757, precision=0.6943, recall=0.6129, f1=0.6266\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7646, precision=0.6694, recall=0.5841, f1=0.5907\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7745, precision=0.6966, recall=0.5963, f1=0.6063\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7540, precision=0.6246, recall=0.5138, f1=0.4676\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7669, precision=0.6820, recall=0.5693, f1=0.5686\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7827, precision=0.7153, recall=0.6136, f1=0.6283\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7570, precision=0.6529, recall=0.5241, f1=0.4889\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.3772, recall=0.5000, f1=0.4300\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 53s, sys: 867 ms, total: 1min 54s\n",
                        "Wall time: 1min 53s\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method tdg --model gbt --experiment Y "
            ]
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": [
                "## OKA"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 11,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='oka', dataset='adult', experiment='Y', model='lr')\n",
                        "----------------------------------------------------------------------------------------------------------\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "LogisticRegression(max_iter=1000)\n",
                        "baseline: acc=0.8258, precision=0.7733, recall=0.7263, f1=0.7441\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.0541,  CVAG: 1.2208, DM: 45779343, runtime: 1679.8495s\n",
                        "acc=0.8257, precision=0.7749, recall=0.7245, f1=0.7432\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.1436,  CVAG: 1.0572, DM: 74581866, runtime: 1026.6293s\n",
                        "acc=0.8151, precision=0.7572, recall=0.7075, f1=0.7254\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2293,  CVAG: 1.0249, DM: 78215756, runtime: 556.6800s\n",
                        "acc=0.8048, precision=0.7406, recall=0.6913, f1=0.7085\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2646,  CVAG: 1.0436, DM: 67648719, runtime: 385.0704s\n",
                        "acc=0.8091, precision=0.7460, recall=0.7021, f1=0.7183\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.2835,  CVAG: 1.0962, DM: 63494349, runtime: 297.9796s\n",
                        "acc=0.7913, precision=0.7281, recall=0.6821, f1=0.6977\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3091,  CVAG: 1.2075, DM: 49972807, runtime: 203.6901s\n",
                        "acc=0.8021, precision=0.7379, recall=0.7035, f1=0.7168\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3481,  CVAG: 1.2239, DM: 44630575, runtime: 152.6138s\n",
                        "acc=0.7970, precision=0.7325, recall=0.6889, f1=0.7043\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.3751,  CVAG: 1.3356, DM: 42834133, runtime: 123.4418s\n",
                        "acc=0.7827, precision=0.7159, recall=0.6707, f1=0.6855\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.4639,  CVAG: 1.3357, DM: 54488791, runtime: 63.4988s\n",
                        "acc=0.7776, precision=0.7031, recall=0.6567, f1=0.6711\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.5207,  CVAG: 1.4532, DM: 56307741, runtime: 42.4566s\n",
                        "acc=0.7720, precision=0.7049, recall=0.6465, f1=0.6615\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.5214,  CVAG: 1.5345, DM: 56660561, runtime: 32.1651s\n",
                        "acc=0.7519, precision=0.6809, recall=0.6669, f1=0.6727\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.5808,  CVAG: 1.4607, DM: 37860561, runtime: 26.4205s\n",
                        "acc=0.7527, precision=0.6480, recall=0.5653, f1=0.5641\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.5947,  CVAG: 1.4085, DM: 61965575, runtime: 22.2303s\n",
                        "acc=0.7544, precision=0.6819, recall=0.6328, f1=0.6448\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6138,  CVAG: 1.7099, DM: 61710610, runtime: 19.4776s\n",
                        "acc=0.7531, precision=0.6674, recall=0.6234, f1=0.6346\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6309,  CVAG: 1.5639, DM: 118827071, runtime: 17.0898s\n",
                        "acc=0.7611, precision=0.6807, recall=0.5186, f1=0.4753\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6417,  CVAG: 1.4502, DM: 70957865, runtime: 15.5590s\n",
                        "acc=0.7470, precision=0.5166, recall=0.5001, f1=0.4286\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6239,  CVAG: 1.5019, DM: 93547970, runtime: 14.0940s\n",
                        "acc=0.7521, precision=0.6714, recall=0.5490, f1=0.5331\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6739,  CVAG: 1.8266, DM: 88322129, runtime: 11.9459s\n",
                        "acc=0.7486, precision=0.6453, recall=0.5556, f1=0.5481\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6714,  CVAG: 1.6814, DM: 63085126, runtime: 10.6706s\n",
                        "acc=0.7280, precision=0.6382, recall=0.5423, f1=0.5205\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6889,  CVAG: 1.5854, DM: 78687441, runtime: 9.5333s\n",
                        "acc=0.7497, precision=0.5877, recall=0.5155, f1=0.4775\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6453,  CVAG: 2.0444, DM: 71506211, runtime: 8.5064s\n",
                        "acc=0.7385, precision=0.6663, recall=0.6170, f1=0.6265\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据不存在, 执行匿名化算法\n",
                        "NCP: 0.6821,  CVAG: 1.9187, DM: 128284885, runtime: 7.7395s\n",
                        "acc=0.7591, precision=0.7065, recall=0.5785, f1=0.5774\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1h 20min 52s, sys: 57 s, total: 1h 21min 49s\n",
                        "Wall time: 1h 20min 13s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method oka --model lr --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 12,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='oka', dataset='adult', experiment='Y', model='nb')\n",
                        "----------------------------------------------------------------------------------------------------------\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "GaussianNB(var_smoothing=1.0)\n",
                        "baseline: acc=0.7985, precision=0.7297, recall=0.7419, f1=0.7353\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.7014, recall=0.7520, f1=0.7114\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7581, precision=0.6944, recall=0.7325, f1=0.7054\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7691, precision=0.6968, recall=0.7183, f1=0.7053\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7763, precision=0.7049, recall=0.7266, f1=0.7136\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7527, precision=0.6886, recall=0.7142, f1=0.6975\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7458, precision=0.6920, recall=0.7367, f1=0.7013\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7511, precision=0.6900, recall=0.7250, f1=0.7000\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7427, precision=0.6986, recall=0.7464, f1=0.7060\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7345, precision=0.6746, recall=0.7084, f1=0.6832\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7596, precision=0.6975, recall=0.7173, f1=0.7052\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7316, precision=0.6781, recall=0.7053, f1=0.6857\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7083, precision=0.6142, recall=0.6159, f1=0.6150\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7266, precision=0.6665, recall=0.6852, f1=0.6730\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7437, precision=0.6541, recall=0.6246, f1=0.6337\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7160, precision=0.6663, recall=0.7147, f1=0.6709\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7374, precision=0.6622, recall=0.6769, f1=0.6680\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7381, precision=0.6344, recall=0.5914, f1=0.5988\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7258, precision=0.6353, recall=0.6307, f1=0.6328\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.6959, precision=0.6280, recall=0.6354, f1=0.6310\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.6411, precision=0.6631, recall=0.7192, f1=0.6238\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.6944, precision=0.6483, recall=0.6717, f1=0.6528\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.6991, precision=0.6398, recall=0.6628, f1=0.6452\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 11s, sys: 900 ms, total: 1min 12s\n",
                        "Wall time: 1min 11s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method oka --model nb --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 13,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='oka', dataset='adult', experiment='Y', model='knn')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "KNeighborsClassifier(n_neighbors=10)\n",
                        "baseline: acc=0.8215, precision=0.7675, recall=0.7172, f1=0.7356\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8144, precision=0.7584, recall=0.7034, f1=0.7225\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8032, precision=0.7400, recall=0.6815, f1=0.7003\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7964, precision=0.7290, recall=0.6684, f1=0.6867\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8077, precision=0.7471, recall=0.6883, f1=0.7076\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7915, precision=0.7297, recall=0.6765, f1=0.6933\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7967, precision=0.7304, recall=0.6900, f1=0.7047\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7957, precision=0.7366, recall=0.6665, f1=0.6860\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7735, precision=0.7009, recall=0.6572, f1=0.6709\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7796, precision=0.7113, recall=0.6370, f1=0.6535\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7628, precision=0.7111, recall=0.5856, f1=0.5880\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7432, precision=0.6655, recall=0.5685, f1=0.5640\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7481, precision=0.6463, recall=0.6041, f1=0.6136\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7545, precision=0.6821, recall=0.6335, f1=0.6455\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7381, precision=0.6375, recall=0.5932, f1=0.6007\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7494, precision=0.6263, recall=0.5706, f1=0.5745\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7388, precision=0.6640, recall=0.6791, f1=0.6700\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7338, precision=0.6319, recall=0.6003, f1=0.6080\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7411, precision=0.6255, recall=0.5601, f1=0.5579\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7155, precision=0.6065, recall=0.5536, f1=0.5471\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7365, precision=0.5800, recall=0.5343, f1=0.5233\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7357, precision=0.6626, recall=0.5894, f1=0.5931\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7478, precision=0.6999, recall=0.5397, f1=0.5113\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 5min 10s, sys: 891 ms, total: 5min 11s\n",
                        "Wall time: 2min 13s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method oka --model knn --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 14,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='oka', dataset='adult', experiment='Y', model='svm')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "LinearSVC(dual=False)\n",
                        "baseline: acc=0.8272, precision=0.7757, recall=0.7274, f1=0.7456\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8248, precision=0.7742, recall=0.7214, f1=0.7407\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8145, precision=0.7576, recall=0.7025, f1=0.7216\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8022, precision=0.7379, recall=0.6811, f1=0.6995\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8094, precision=0.7489, recall=0.6940, f1=0.7127\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7912, precision=0.7306, recall=0.6716, f1=0.6892\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7999, precision=0.7380, recall=0.6848, f1=0.7024\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7978, precision=0.7348, recall=0.6852, f1=0.7020\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7826, precision=0.7164, recall=0.6663, f1=0.6818\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7764, precision=0.7014, recall=0.6504, f1=0.6652\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7709, precision=0.7039, recall=0.6407, f1=0.6557\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7528, precision=0.6831, recall=0.6734, f1=0.6777\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7527, precision=0.6480, recall=0.5653, f1=0.5641\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.6819, recall=0.6328, f1=0.6448\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7526, precision=0.6654, recall=0.6132, f1=0.6241\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7611, precision=0.6807, recall=0.5186, f1=0.4753\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "acc=0.7474, precision=0.3737, recall=0.5000, f1=0.4277\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7506, precision=0.6663, recall=0.5447, f1=0.5259\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7486, precision=0.6453, recall=0.5556, f1=0.5481\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7280, precision=0.6382, recall=0.5423, f1=0.5205\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7497, precision=0.5877, recall=0.5155, f1=0.4775\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7385, precision=0.6663, recall=0.6170, f1=0.6265\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7591, precision=0.7065, recall=0.5785, f1=0.5774\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 31s, sys: 20.6 s, total: 1min 52s\n",
                        "Wall time: 1min 13s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method oka --model svm --experiment Y "
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 15,
            "metadata": {},
            "outputs": [
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "----------------------------------------------------------------------------------------------------------\n",
                        "Namespace(anonymity=50, anonymity_method='oka', dataset='adult', experiment='Y', model='gbt')\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "GradientBoostingClassifier(learning_rate=1.0, max_depth=1, random_state=0)\n",
                        "baseline: acc=0.8272, precision=0.7796, recall=0.7193, f1=0.7404\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 2\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8214, precision=0.7739, recall=0.7062, f1=0.7284\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 5\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8152, precision=0.7574, recall=0.7077, f1=0.7256\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 10\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8042, precision=0.7380, recall=0.6966, f1=0.7119\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 15\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8125, precision=0.7513, recall=0.7071, f1=0.7235\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 20\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7985, precision=0.7383, recall=0.6978, f1=0.7126\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 30\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.8004, precision=0.7360, recall=0.6974, f1=0.7118\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 40\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7951, precision=0.7274, recall=0.7036, f1=0.7135\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 50\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7819, precision=0.7137, recall=0.6779, f1=0.6907\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 100\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7630, precision=0.6772, recall=0.6239, f1=0.6366\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 150\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7727, precision=0.7037, recall=0.6657, f1=0.6784\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 200\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7530, precision=0.6846, recall=0.6789, f1=0.6816\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 250\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7509, precision=0.6438, recall=0.5706, f1=0.5723\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 300\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7544, precision=0.6819, recall=0.6328, f1=0.6448\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 350\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7526, precision=0.6654, recall=0.6132, f1=0.6241\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 400\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7611, precision=0.6807, recall=0.5186, f1=0.4753\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 450\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7470, precision=0.5166, recall=0.5001, f1=0.4286\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 500\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7521, precision=0.6714, recall=0.5490, f1=0.5331\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 600\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7486, precision=0.6453, recall=0.5556, f1=0.5481\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 700\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7280, precision=0.6382, recall=0.5423, f1=0.5205\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 800\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n"
                    ]
                },
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "/home/user/anaconda3/envs/k-anonymity/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
                        "  _warn_prf(average, modifier, msg_start, len(result))\n"
                    ]
                },
                {
                    "name": "stdout",
                    "output_type": "stream",
                    "text": [
                        "acc=0.7555, precision=0.3777, recall=0.5000, f1=0.4303\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 900\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7385, precision=0.6663, recall=0.6170, f1=0.6265\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "K = 1000\n",
                        "匿名化数据已存在, 直接开始机器学习实验\n",
                        "acc=0.7591, precision=0.7065, recall=0.5785, f1=0.5774\n",
                        "----------------------------------------------------------------------------------------------------------\n",
                        "CPU times: user 1min 40s, sys: 685 ms, total: 1min 41s\n",
                        "Wall time: 1min 39s\n"
                    ]
                }
            ],
            "source": [
                "%%time\n",
                "%run main.py --dataset adult --anonymity_method oka --model gbt --experiment Y "
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "k-anonymity",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.8.19"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 2
}
